TREES
Photo of Nemonte Nenquimo
And now, as a woman, as a mother, as a water protector and a forest defender,
I want you to join us in our fight to defend our way of life, our forests and our planet…
— Nemonte Nenquimo
Sunburst Charts are visually-appealing and interactive charts ideal for hierarchical data. The hierarchy is represented by one ring, with the innermost ring representing the root node and the hierarchy moving outwards from it. The rings are then segmented to a categorical data point based on their hierarchical relationship to the parent slice. The size of each segment can either be divided equally under its parent nodes or can me made proportional to a value.
df_file_path <- "archetypes/top-100-influential-women/top-100-influential-women.csv"
df = read.csv(df_file_path, header = TRUE, stringsAsFactors = FALSE, encoding = "UTF-8")
df
df_wrangle <- df
# select only the needed columns
df_wrangle <- df_wrangle %>% select(name, category, country, role)
# enrich with UN region name
df_wrangle$region <- countrycode(df_wrangle$country, origin='country.name', destination='un.region.name')
# for non-matches, replace with a generic "World" entry
# df_wrangle <- df_wrangle %>% replace_na(list(region = "World"))
# or, replace with country entries
df_wrangle$region <- ifelse(is.na(df_wrangle$region), df_wrangle$country, df_wrangle$region)
# we'll add a constant size variable for the aggregation
df_wrangle <- df_wrangle %>% mutate(ID = row_number(), SIZE = 1)
# we need to create unique ids to avoid duplicate vertices
df_wrangle <- df_wrangle %>% mutate(country = paste0(country, "_", ID))
df_wrangle
df_filtered <- filter(df_wrangle, category != 'Top 100')
# unique edges
df_edges_0 <- aggregate(x = df_filtered$SIZE,
by = list(df_filtered$category),
FUN = sum)
# standard edge table structure
colnames(df_edges_0) <- c("TO", "SIZE")
df_edges_0$FROM <- "Top 100"
df_edges_1 <- aggregate(x = df_filtered$SIZE,
by = list(df_filtered$category, df_filtered$country),
FUN = sum)
# standard edge table structure
colnames(df_edges_1) <- c("FROM","TO", "SIZE")
df_edges_2 <- aggregate(x = df_filtered$SIZE,
by = list(df_filtered$country, df_filtered$name),
FUN = sum)
# standard edge table structure
colnames(df_edges_2) <- c("FROM","TO", "SIZE")
# combine
df_edges <- rbind(df_edges_0, df_edges_1, df_edges_2)
df_edges <- df_edges[c("FROM", "TO", "SIZE")]
df_edges
# consolidate to have nodes list; used for validation if needed
df_node_list <- as.data.frame(c(df_edges$FROM, df_edges$TO))
colnames(df_node_list) <- c("NODE")
#df_node_list
# root nodes
df_nodes_1 <- aggregate(x = df_wrangle$SIZE,
by = list(df_wrangle$category),
FUN = sum)
colnames(df_nodes_1) <- c("NODE","SIZE")
df_nodes_1$COLOR <- df_nodes_1$NODE
# leaf nodes
df_nodes_2 <- aggregate(x = df_filtered$SIZE,
by = list(df_filtered$country, df_filtered$category),
FUN = sum)
colnames(df_nodes_2) <- c("NODE", "COLOR", "SIZE")
# leaf nodes
# df_nodes_3 <- df_filtered %>% select(name, category, SIZE)
df_nodes_3 <- aggregate(x = df_filtered$SIZE,
by = list(df_filtered$name, df_filtered$category),
FUN = sum)
colnames(df_nodes_3) <- c("NODE", "COLOR", "SIZE")
# combine
df_nodes <- rbind(df_nodes_1, df_nodes_2, df_nodes_3)
df_nodes
# a check, when needed, to find missing node entries
test_1 <- anti_join(df_nodes, df_node_list, by = "NODE" )
#test_1
test_2 <- anti_join(df_node_list, df_nodes, by = "NODE" )
#test_2
# transform to graph data structure
df_graph <- graph_from_data_frame( df_edges, vertices = df_nodes )
theme_opts <- theme(
text = element_text(family = "inconsolata"),
plot.margin = unit(c(1.5,1,1,1), "in"),
legend.position='none'
)
category_palette <- c("Top 100" = "#FFFFFF", "LEAF" = "#FFFFFF", "Creativity" = "#E8F5E9", "Identity" = "#E3F2FD", "Knowledge" = "#F3E5F5","Leadership" = "#FCE4EC")
v1 <- ggraph(df_graph, layout = 'partition', circular = TRUE, weight = SIZE) +
geom_node_arc_bar(aes(fill = ifelse(leaf, 'LEAF', COLOR)), color='#ffffff') +
scale_fill_manual(values = category_palette) +
geom_node_label( aes(label=name, filter=depth<2), size = 6, label.size = NA, family = "inconsolata") +
geom_node_text( aes(filter = leaf, angle = node_angle(x, y), label = name), hjust = 0, size = 3, family = "inconsolata" ) +
coord_fixed(clip = 'off') +
theme_void() +
theme_opts
girafe(ggobj = v1, width_svg = 1280/72, height_svg = 720/72,
options = list(opts_sizing(rescale = TRUE, width = 1.0))
)
# pg <- ggplot_build(v1)
# pg$data[[2]]
# pg$data[[3]]
total <- df %>%
select(category, country, role, name) %>%
# remove dash within dplyr pipe
mutate_at(vars(3, 4), funs(gsub("-", "", .))) %>%
mutate(
path = paste(category, country, role, name, sep = "-")
) %>%
slice(2:100) %>%
mutate(
V2 = 2
)
sund2b(data = data.frame(xtabs(V2~path, total)), rootLabel = 'continent', showLabels = TRUE,
colors = list(range = RColorBrewer::brewer.pal(9, "Set3")))